options(warn = -1)


argv <- getwd()
setwd(argv)

# Load libraries 
if (!require("pacman")) install.packages("pacman", 
                                         repos = "http://cran.us.r-project.org")

pacman::p_load(dplyr,tidyverse,extrafont,ggplot2,xtable)

DATA_LOC = "./data"

# define sample and rater types 
sample_types <- c("sample_1","sample_2")
outcome_types <- c("Exposure","Engagement")
rater_types <- c("NG","MBFC")

# template for figures
custom_theme <- theme_minimal() +
  theme(
  axis.line.x = element_line(colour = 'black', size = .5, linetype='solid'),
  axis.line.y = element_line(colour = 'black', size = .5, linetype='solid'),
  text = element_text(family = "Helvetica"),
  panel.grid.major.x = element_blank(),
  axis.text = element_text(size = 20),
  axis.title = element_text(size = 22),
  plot.title = element_text(size = 24, hjust = 0.5,face = "bold")
) 


# define colors for raters  
rating_colors <- 
  data.frame("info_type"=c("HQ","LQ"),
             "color"=c("#009E73","#D55E00"))


# define colors for navigation types 
query_colors <- 
  data.frame("query"=c("Navigational","Non-Navigational"),
             "color"=c("#CC79A7","#56B4E9"))



######################################################
### Fig 1
######################################################

df <- list.files(path = DATA_LOC,
                 pattern = "total_info*", 
                 full.names = T) %>% 
  map_df(~read_csv(., col_types = cols())) %>%
  left_join(.,rating_colors) %>%
  # rename HQ (high quality) Reliable
  # make proportion into percent
  mutate(info_type=ifelse(info_type=="HQ","Reliable Sites","Unreliable Sites"),
         prop=prop*100)

for(i in sample_types){
  # loop through sample types, filter dataframe, and create plots
  temp_df <- df %>%
    filter(sample==i)
  
  p1 <- ggplot(temp_df, aes(x = info_type, y = prop,fill=color)) +
    geom_bar(stat = "identity", width = 0.50) +
    labs(title = "Search Result Information Quality", 
         y = "Percentage of Total Search Results") + 
    custom_theme + theme(axis.title.x = element_blank()) + scale_fill_identity() +
    ylim(0,45)

  pdf(paste("./output/plots/total_info_",i,".pdf",sep=""),height = 7,width=9)
  print(p1)
  dev.off()
  
}


######################################################
### Fig 2
######################################################
df <- list.files(path = DATA_LOC,
                 pattern = "quality_ranks*", 
                 full.names = T) %>% 
  map_df(~read_csv(., col_types = cols())) %>%
  left_join(.,rating_colors) %>%
  # rename HQ (high quality) Reliable
  # make proportion into percent
  mutate(info_type=ifelse(info_type=="HQ","Reliable Sites","Unreliable Sites"),
         prop=prop*100)


for(i in sample_types){
  # loop through sample types, filter dataframe, and create plots
  temp_df <- df %>%
    filter(sample==i)
  
  p1 <- ggplot() + 
    geom_point(data=temp_df,aes(x = factor(rank), y = prop,group=info_type,
                                color=color),size=5) + 
    labs(title = "Search Result Information Quality Across Ranks", 
         y = "Percentage of Total Search Results", x = "Search Result Rank",
         color="") + 
    custom_theme + theme(legend.position="bottom",
                         legend.text=element_text(size=16))  +
    scale_color_identity(guide = "legend",labels=c("Reliable Sites",
                                                   "Unreliable Sites")) + 
    ylim(0,50) 

  pdf(paste("./output/plots/lq_ranks_",i,".pdf",sep=""),height = 7,width=9)
  print(p1)
  dev.off()
  
}


######################################################
### Fig 3 (B-E)
######################################################
df <- list.files(path = DATA_LOC,
                 pattern = "nav_likelihood*", 
                 full.names = T) %>% 
  map_df(~read_csv(., col_types = cols())) %>%
  left_join(.,query_colors)


for(i in sample_types){
  for(j in outcome_types){
    for(k in rater_types){
  
    temp_df <- df %>%
      filter(sample==i & measure==j & rater==k)
    
    # set y-axis max values across plot types
    if (j=="Exposure"){
      title_name <- paste("Likelihood of",j,"to Unreliable Sites",sep=" ")
      y_name <- paste("Likelihood of",j,sep=" ")
      y_max <- .3
    } else{
      title_name <- paste("Likelihood of",j,"with Unreliable Sites",sep=" ")
      y_name <- paste("Likelihood of",j,sep=" ")
      y_max <- 1
    }
    
    
    p1 <- ggplot(temp_df, aes(x = query, y = liklihood,fill=color)) +
      geom_bar(stat = "identity", width = 0.50) +
      labs(title = title_name, 
           y = y_name, x = "Result Rank") + 
      custom_theme + scale_fill_identity() + 
      theme(axis.title.x = element_blank()) +
      scale_y_continuous(labels = scales::label_number(accuracy = 0.01),
                         limits = c(0,y_max)) 
    
    pdf(paste("./output/plots/lq_likelihood_",i,"_",j,"_",k ,".pdf",sep=""),height = 7,width=9)
    print(p1)
    dev.off()

}}}



######################################################
### Fig 3 (F & G)
######################################################
df <- list.files(path = DATA_LOC,
                 pattern = "nav_diff*", 
                 full.names = T) %>% 
  map_df(~read_csv(., col_types = cols()))


for(i in sample_types){
  for(k in rater_types){
  

  temp_df <- df %>%
    filter(sample==i & rater==k)
  
  p1 <- ggplot() + 
    geom_pointrange(data=temp_df,aes(x = factor(Rank), y = Difference, 
                                            ymin = Difference-(1.96*SE), 
                                            ymax = Difference+(1.96*SE)),size=1.25) + 
    labs(title = "Differences in Exposure to Unreliable Sites", 
         y = "Difference in Exposure to Unreliable Sites ", 
         x = "Search Result Rank") + 
    custom_theme +
    ylim(-.20,1) + 
    geom_hline(yintercept = 0, linetype = 2) 
  
  pdf(paste("./output/plots/nav_lq_diff_",i,"_",k ,".pdf" ,sep=""),height = 7,width=9)
  print(p1)
  dev.off()

}}


######################################################
### Table 1
######################################################

df <- list.files(path = DATA_LOC,
                 pattern = "nav_amounts", 
                 full.names = T) %>% 
  map_df(~read_csv(., col_types = cols())) 

for(k in rater_types){
  
  iter_df <- df 
  
  iter_df <- iter_df %>%
    filter(rater==k) %>%
  select(-c(X1,rater)) %>%
    select("% Nav. Searches"=`X..Nav..Searches.`,
           "% Exposure from Nav."=`X..LQ.Exposure.from.Nav..Searches.`,
           "% Engagement from Nav."=`X..LQ.Engagement.from.Nav..Searches.`) %>%
    mutate_all(~ round(. * 100, 2))
  
  table_out <- xtable(iter_df)
  
  
  print(table_out, file = paste("./output/tables/nav_values_",k ,".tex",sep=""), 
        include.rownames = FALSE, table.environment = "table")
  
}


######################################################
### Descriptive Table 
######################################################

df <- list.files(path = DATA_LOC,
                 pattern = "*descriptive", 
                 full.names = T) %>% 
  map_df(~read_csv(., col_types = cols())) 

table_out <- xtable(df)


print(table_out, file = paste("./output/tables/descriptives",".tex",sep=""), 
      include.rownames = FALSE, table.environment = "table")
